MacTech 1 to 12

home *** CD-ROM | disk | FTP | other *** search

/ MacTech 1 to 12 / MacTech-vol-1-12.toast / Source / MacTech® Magazine / Volume 06 - 1990 / 06.09 Sep 90 / Canon Sources / Fast Canon / Canon.c next >

Wrap

C/C++ Source or Header | 1990-02-07 | 24.8 KB | 1,213 lines | [TEXT/nX^n]

////////////////////////////////////////////////////////////////// // // Canon.c // ------- // New MPW Canon tool // ////////////////////////////////////////////////////////////////// #pragma load "managers" ////////////////////////////////////////////////////////////////// // // Constants and Macros // ////////////////////////////////////////////////////////////////// #define nil 0 #define stdinfd 0 #define stdoutfd 1 #define stderrfd 2 #define stdunit(x) ((x >= stdinfd) && (x <= stderrfd)) #define notstdunit(x) (x > stderrfd) #define nombuffsize 1024 #define truebuffsize 1200 #define classcount 15 #define idstate 7 #define blocksize 200 ////////////////////////////////////////////////////////////////// // // Types // ////////////////////////////////////////////////////////////////// typedef enum {false, true} logical; typedef enum {nocode, pascalcode, ccode} codetype; typedef struct node { char *key; struct node *left; struct node *right; int balance; char *data; } node; typedef struct block { struct block *previous; int free; node data[blocksize]; } block; ////////////////////////////////////////////////////////////////// // // Globals // ////////////////////////////////////////////////////////////////// unsigned char *CASETABLE; block *MEMORY = nil; Handle DICTIONARY; char NOTHING[] = ""; ////////////////////////////////////////////////////////////////// // // Prototypes // ////////////////////////////////////////////////////////////////// void initmac(); int openoutput(char *thename, int output); int readinput(int input, Handle inbuffer); int filter(char *inbuffer, int buffersize, int output, codetype language, node *symbols); int writeoutput(int output, char *outbuffer, int buffersize); node *parser(char *dictname, codetype language); char *gettoken(char *buffer, int buffersize, char *classtable, char *statetable, int *thetoken); node *createnode(char *thekey, char *thedata); unsigned int insert(node *parent, char *thekey, char *thedata, int depth); node *lookup(node *thetable, char *thekey); void destroy(); void dump(node *thenode); int compare(unsigned char *string1, unsigned char *string2); ////////////////////////////////////////////////////////////////// // // main // ---- // the "main" routine reads and interprets the command line, // reads the dictionary file, and filters the input // ////////////////////////////////////////////////////////////////// int main(int argc, char *argv[]) { int output; logical sensitive; codetype language; char *outputname; char *dictname; logical errors; int index; char *thetail; Handle thehandle; node *symbols; int input; int buffersize; initmac(); // "output" is the fd of the output file, initially stdout // "sensitive" is the case sensitivity, initially insensitive // "language" is the language to parse, initially unknown output = stdoutfd; sensitive = false; language = nocode; // "outputname" is the name of the output file // "dictname" is the name of the dictionary file // "errors" is the error flag, initially indicating no errors outputname = nil; dictname = nil; errors = false; // command line interpreter: first pass for (index = 1; index < argc; index++) { if (argv[index][0] == '-') { switch (argv[index][1]) { // "-p" and "-c" options set language type; // these override any previous setting case 'C': case 'c': language = ccode; break; case 'P': case 'p': language = pascalcode; break; // "-s" option make Canon case sensitive case 'S': case 's': sensitive = true; break; // "-o" option names the output file; remember the name // and read the file later case 'O': case 'o': index++; if (outputname == nil) outputname = argv[index]; else { fprintf(stderr, "Error - multiple output " "files %s and %s!\n", outputname, argv[index]); errors = true; } break; // "-d" option names the dictionary file; remember the name // and read the file later case 'D': case 'd': index++; if (dictname == nil) dictname = argv[index]; else { fprintf(stderr, "Error - multiple dictionary " "files %s and %s!\n", dictname, argv[index]); errors = true; } break; default: fprintf(stderr, "Error - Unknown option %s\n", argv[index]); errors = true; break; } } else if (language == nocode) { // argv[index] is the name of an input file; if // "language" has not changed since initialization, // set "language" according to file name thetail = argv[index] + strlen(argv[index]) - 2; if (strcmp(thetail, ".p") == 0) language = pascalcode; else if (strcmp(thetail, ".c") == 0) language = ccode; } } // exit if errors were found in the first pass if (errors) exit(2); // if "language" is still unknown, set it to Pascal if (language == nocode) language = pascalcode; // load the case table if (sensitive) thehandle = GetResource('TABL', 4002); else thehandle = GetResource('TABL', 4001); HLock(thehandle); CASETABLE = (unsigned char *)*thehandle; // copy the dictionary into the symbol table if (dictname == nil) { fprintf(stdout, "Error - no dictionary file specified!\n"); exit(2); } symbols = parser(dictname, language); if (symbols == nil) exit(2); // if "outputname" has been found, open the output file if (outputname != nil) { output = openoutput(argv[++index], output); if (output < 0) { fprintf(stderr, "Error - Unable to open " "output file %s!\n", argv[index]); exit(2); } } // "input" is the fd of the input file, initially stdin // "thehandle" is the input buffer, initially empty // "buffersize" is the size of "thehandle" input = stdinfd; thehandle = NewHandle(0); buffersize = 0; // command line interpreter: second pass for (index = 1; index < argc; index++) { // skip all options (read in first pass) if (argv[index][0] == '-') { switch (argv[index][1]) { case 'D': case 'O': case 'd': case 'o': index++; } } else { // argv[index] is the name of an input file; open // the file and read it into the input buffer input = open(argv[index], O_RDONLY); if (input < 0) { fprintf(stderr, "Error - Unable to open " "input file %s!\n", argv[index]); exit(2); } buffersize = readinput(input, thehandle); if (buffersize < 0) { fprintf(stderr, "Error - Reading from %s!\n", argv[index]); exit(2); } close(input); // call "filter" to read the input buffer and write // filtered output HLock(thehandle); filter(*thehandle, buffersize, output, language, symbols); HUnlock(thehandle); } } // if "input" is still a standard unit number, then no // input file was opened, and input must be from // standard input if (stdunit(input)) { buffersize = readinput(input, thehandle); if (buffersize < 0) { fprintf(stderr, "Error - Reading from standard input!\n"); exit(2); } // call "filter" to read the input buffer and write // filtered output HLock(thehandle); filter(*thehandle, buffersize, output, language, symbols); HUnlock(thehandle); } // wrapup: dispose of the input buffer, close "output" // if the program opened it and dispose of the symbol table DisposHandle(thehandle); if (notstdunit(output)) close(output); destroy(); exit(0); } ////////////////////////////////////////////////////////////////// // // initmac // ------- // initialize any necessary managers and whatnot. // ////////////////////////////////////////////////////////////////// void initmac() { InitGraf((Ptr)&qd.thePort); SetFScaleDisable(true); InitCursorCtl(nil); } ////////////////////////////////////////////////////////////////// // // openoutput // ---------- // open the output file. returns the fd or, if an error // occurs, the error flag. // ////////////////////////////////////////////////////////////////// int openoutput(char *thename, int output) { FInfo theinfo; // if "output" is not a standard unit, then an output // file must have already be open if (notstdunit(output)) { fprintf(stderr, "Warning - additional output " "file %s ignored!\n", thename); return(output); } // open the output file for writing (O_WRONLY), // creating it if necessary (O_CREAT) and // zeroing it otherwise (O_TRUNC) output = open(thename, O_WRONLY + O_CREAT + O_TRUNC); if (output < 0) return(output); // if the file was created by "open", it will be // untyped, so set the type to TEXT and MPS if (getfinfo(thename, 0, &theinfo)) { fprintf(stderr, "Warning - unable to get info for " "output file %s!\n", thename); return(output); } theinfo.fdType = 'TEXT'; theinfo.fdCreator = 'MPS '; if (setfinfo(thename, 0, &theinfo)) fprintf(stderr, "Warning - unable to set info for " "output file %s!\n", thename); return(output); } ////////////////////////////////////////////////////////////////// // // readinput // --------- // this routine reads an input file into the input buffer and // returns the new size of the buffer or, if a read error // occurs, the error flag. // ////////////////////////////////////////////////////////////////// int readinput(int input, Handle thehandle) { int buffersize; int readsize; buffersize = 0; SetHandleSize(thehandle, 1024); HLock(thehandle); while ((readsize = read(input, *thehandle + buffersize, 1024)) > 0) { buffersize += readsize; HUnlock(thehandle); SetHandleSize(thehandle, buffersize + 1024); HLock(thehandle); } if (readsize < 0) return(readsize); HUnlock(thehandle); SetHandleSize(thehandle, buffersize + 1024); return(buffersize); } ////////////////////////////////////////////////////////////////// // // filter // ------ // this routine does the main work of the program. // ////////////////////////////////////////////////////////////////// int filter(char *inbuffer, int buffersize, int output, codetype language, node *symbols) { int inposition; int outposition; int thetoken; node *thenode; int thelength; Handle thehandle; char *classtable; char *statetable; char outbuffer[truebuffsize]; int thestate; unsigned char thechar; int theclass; int newstate; int writesize; // “inposition” is the current read position // “outposition” is the current write position // “thetoken” is the position of the beginning of the // current identifier inposition = 0; outposition = 0; thetoken = 0; // “classtable” converts characters into classes thehandle = GetResource('TABL', 1001); HLock(thehandle); classtable = *thehandle; // “statetable” is the state machine if (language == pascalcode) thehandle = GetResource('TABL', 3001); else thehandle = GetResource('TABL', 3002); HLock(thehandle); statetable = *thehandle; // start the machine in state 0 thestate = 0; while (inposition < buffersize) { // read the next character, find its class // and the new state thechar = inbuffer[inposition++]; theclass = classtable[thechar]; newstate = statetable[classcount * thestate + theclass]; switch (newstate) { // found an identifier: if it is in the symbol table, // replace it with the table's data. Then go to // state 0. case -1: inposition--; outbuffer[outposition] = '\0'; thenode = lookup(symbols, &outbuffer[thetoken]); if (thenode != nil) { outposition -= strlen(&outbuffer[thetoken]); thelength = strlen(thenode->data); BlockMove((Ptr)thenode->data, &outbuffer[outposition], thelength); outposition += thelength; } newstate = 0; break; // retract if going from state 2 to state 0; otherwise, // copy input to output case 0: if (thestate == 2) inposition--; else outbuffer[outposition++] = thechar; break; // reading an identifier: if this is the beginning, // record the position for later use. Then, fall // through to the default case idstate: if (thestate != idstate) thetoken = outposition; // all other cases, copy input to output default: outbuffer[outposition++] = thechar; break; } // if the output buffer fills up, and we're not in the // middle of an identifier, write it to disk if ((outposition >= nombuffsize) && (thestate != idstate) && (newstate != idstate)) { outposition = writeoutput(output, outbuffer, outposition); if (outposition < 0) return(outposition); } thestate = newstate; } // write the output buffer to disk writesize = write(output, outbuffer, outposition); return(writesize); } ////////////////////////////////////////////////////////////////// // // writeoutput // ---------- // this routine flushes the output buffer by writing // it to the output file. It returns the new size of the // buffer or, if a write error occurs, the error flag. // ////////////////////////////////////////////////////////////////// int writeoutput(int output, char *outbuffer, int buffersize) { int writesize; writesize = write(output, outbuffer, nombuffsize); if (writesize < 0) return(writesize); buffersize -= writesize; BlockMove(outbuffer + writesize, outbuffer, buffersize); return(buffersize); } ////////////////////////////////////////////////////////////////// // // parser // ------ // this routine creates the symbol table and stores the // Canon dictionary in it. // ////////////////////////////////////////////////////////////////// node *parser(char *dictname, codetype language) { Handle thehandle; char *parsetable; char *classtable; char *statetable; int thefile; int buffersize; char *buffer; node *symbols; int thestate; int newstate; int theline; int errors; int thetoken; char *thekey; char *thedata; char *dummy; char **thestring; thedata = nil; // "parsetable" is the parser's state machine thehandle = GetResource('TABL', 1000); HLock(thehandle); parsetable = *thehandle; // "classtable" is the character class table thehandle = GetResource('TABL', 1001); HLock(thehandle); classtable = *thehandle; // "statetable" is the lexical state machine if (language == pascalcode) thehandle = GetResource('TABL', 2001); else thehandle = GetResource('TABL', 2002); HLock(thehandle); statetable = *thehandle; // open the dictionary file... thefile = open(dictname, O_RDONLY); if (thefile < 0) { fprintf(stderr, "Error - Unable to open " "dictionary file %s!\n", dictname); return(nil); } // and read it into the buffer DICTIONARY = NewHandle(0); buffersize = readinput(thefile, DICTIONARY); if (buffersize < 0) { fprintf(stderr, "Error - Unable to read %s!\n", dictname); close(thefile); return(nil); } close(thefile); HLock(DICTIONARY); buffer = (char *)*DICTIONARY; // "symbols" is the symbol table symbols = createnode(NOTHING, NOTHING); // start the machine in state 0, and on line 1 thestate = 0; theline = 1; errors = 0; // read the first identifier into “thekey” thekey = gettoken(buffer, buffersize, classtable, statetable, &thetoken); while (thetoken >= 0) { newstate = parsetable[3 * thestate + thetoken]; switch (newstate) { // if we got here from state 1, then we read only one // identifier; if from state 2, we read both “thekey” // and “thedata” // state 0 is the beginning of a line, so increment the // line counter and set “thestring” to “thekey” case 0: if (thestate == 1) thetoken = insert(symbols, thekey, thekey, 0); else if (thestate == 2) thetoken = insert(symbols, thekey, thedata, 0); if (thetoken == 4) { fprintf(stderr, "\n#\tDuplicate key in dictionary " "file\n\tFile \"%s\"; Line %d\n", dictname, theline); errors++; } theline++; thestring = &thekey; break; // having read one identifier into “thekey”, the // next one should go into “thedata” case 1: thestring = &thedata; break; // having read one identifier into “thekey”, and the // next one “thedata”, read anything else into // “dummy” case 2: thestring = &dummy; break; // case 3 is the error case; if we just got here, write // an error message case 3: if (thestate != newstate) fprintf(stderr, "\n#\tError in dictionary file\n" "\tFile \"%s\"; Line %d\n", dictname, theline); errors++; break; } thestate = newstate; *thestring = gettoken(buffer, buffersize, classtable, statetable, &thetoken); } if (errors > 0) { destroy(); fprintf(stderr, "\n%d errors found in dictionary\n", errors); return(nil); } // dump(symbols); return(symbols); } ////////////////////////////////////////////////////////////////// // // gettoken // -------- // This routine reads the dictionary file until it finds a // token (which it returns) or the end of the file (in // which case it returns -1). // ////////////////////////////////////////////////////////////////// char *gettoken(char *buffer, int buffersize, char *classtable, char *statetable, int *thetoken) { static int position = 0; int thestate; unsigned char thechar; int theclass; int newstate; int thestart; thestart = 0; // start the machine in state 0 thestate = 0; while (position < buffersize) { // read the next character, look up its class, // and get the new state thechar = buffer[position++]; theclass = classtable[thechar]; newstate = statetable[classcount * thestate + theclass]; switch (newstate) { // -3 => ERR, -2 => CR; in either case, just return the // the token number case -3: case -2: buffer[position - 1] = '\0'; *thetoken = - 1 - newstate; return(nil); // -1 => ID; return the token number // and the identifier in “thestring” case -1: position--; *thetoken = - 1 - newstate; return(&buffer[thestart]); case 0: if (thestate == 1) position--; break; case 1: break; case 2: break; } if (newstate != 2) buffer[position - 1] = '\0'; else if (thestate == 0) thestart = position - 1; thestate = newstate; } *thetoken = -10; return(nil); } ////////////////////////////////////////////////////////////////// // // createnode // ---------- // create and initialize a new node // ////////////////////////////////////////////////////////////////// node *createnode(char *thekey, char *thedata) { block *theblock; node *thenode; if (MEMORY && (MEMORY->free < blocksize)) thenode = &(MEMORY->data[MEMORY->free++]); else { theblock = (block *)NewPtr(sizeof(block)); if (theblock == nil) return(nil); theblock->previous = MEMORY; MEMORY = theblock; theblock->free = 1; thenode = &(MEMORY->data[0]); } thenode->key = thekey; thenode->balance = 0; thenode->left = nil; thenode->right = nil; thenode->data = thedata; return(thenode); } ////////////////////////////////////////////////////////////////// // // insert // ------ // Insert a new key into the table and correct balance // factors recursively. The routine returns a queue of // path directions. // ////////////////////////////////////////////////////////////////// unsigned int insert(node *parent, char *thekey, char *thedata, int depth) { int difference; node *high; unsigned int result; node *low; node *child; // if thekey matches this node, then return an error // (duplicate keys) difference = compare(thekey, parent->key); if (difference == 0) return(4); // if there's a slot for the new node, then create it and return // 1 if it is to the left of parent and 2 if to the right // else determine high, the next step in the search path if (difference < 0) { high = parent->left; if (high == nil) { parent->left = createnode(thekey, thedata); return(1); } } else { high = parent->right; if (high == nil) { parent->right = createnode(thekey, thedata); return(2); } } // now continue the search by calling “insert” recursively // if the result is 0 (no balancing needed at this level) or // the result is 4 (duplicate key), return result = insert(high, thekey, thedata, depth + 1); if ((result == 0) || (result == 4)) return(result); // the low 4 bits of “result” indicate the direction of the // search path leaving “high”; increment high's balance // if the path goes left, and decrement it if the path // goes right if (result % 16 == 1) high->balance++; else high->balance--; // if the balance is now zero, no further correction of balances // is needed, so return // if it's ±1, push the direction away from “parent” onto the // “result” stack and return it // if it's ±2, continue to balancing transformation switch (high->balance) { case 0: return(0); case 1: case -1: return((result << 4) + ((difference < 0) ? 1 : 2)); case 2: case -2: break; } // use the direction away from “high” to find “low”, then // switch on that direction and the next one // the easiest way to follow these transformations is to // refer to the pictures low = (result % 16 == 1) ? high->left : high->right; switch (result % 256) { case 0x11: // left-left case high->left = low->right; low->right = high; if (difference < 0) parent->left = low; else parent->right = low; high->balance = 0; low->balance = 0; return(0); case 0x12: // right-left case child = low->left; high->right = child->left; low->left = child->right; if (difference < 0) parent->left = child; else parent->right = child; child->left = high; child->right = low; child->balance = 0; low->balance = 0; high->balance = 0; result &= 0xF00; if (result == 0x100) low->balance = -1; else if (result == 0x200) high->balance = 1; return(0); case 0x21: // left-right case child = low->right; low->right = child->left; high->left = child->right; if (difference < 0) parent->left = child; else parent->right = child; child->left = low; child->right = high; child->balance = 0; low->balance = 0; high->balance = 0; result &= 0xF00; if (result == 0x100) high->balance = -1; else if (result == 0x200) low->balance = 1; return(0); case 0x22: // right-right case high->right = low->left; low->left = high; if (difference < 0) parent->left = low; else parent->right = low; high->balance = 0; low->balance = 0; return(0); } } ////////////////////////////////////////////////////////////////// // // lookup // ------ // Find a key in the table // ////////////////////////////////////////////////////////////////// node *lookup(node *thetable, char *thekey) { node *thenode; int difference; thenode = thetable; while (difference = compare(thekey, thenode->key)) { if (difference < 0) thenode = thenode->left; else thenode = thenode->right; if (thenode == nil) return(nil); } return(thenode); } ////////////////////////////////////////////////////////////////// // // destroy // ------- // Dispose of the table // ////////////////////////////////////////////////////////////////// void destroy() { block *theblock; block *newblock; DisposHandle(DICTIONARY); theblock = MEMORY; while (theblock) { newblock = theblock->previous; DisposPtr((Ptr)theblock); theblock = newblock; } } ////////////////////////////////////////////////////////////////// // // dump // ---- // Dump the table // ////////////////////////////////////////////////////////////////// void dump(node *thenode) { if (thenode == nil) return; dump(thenode->left); printf("key %s; data %s; left %s; right %s\n", thenode->key, thenode->data, thenode->left ? thenode->left->key : "nil", thenode->right ? thenode->right->key : "nil"); dump(thenode->right); } ////////////////////////////////////////////////////////////////// // // compare // ------- // replacement for “strcmp” routine; case sensitivity is // determined by the global transliteration table CASETABLE. // ////////////////////////////////////////////////////////////////// int compare(unsigned char *string1, unsigned char *string2) { register int char1; register int char2; register int difference; char1 = *string1++; char2 = *string2++; while (char1 || char2) { difference = CASETABLE[char1] - CASETABLE[char2]; if (difference) return(difference); char1 = *string1++; char2 = *string2++; } return(0); } //////////////////////////////////////////////////////////////////